#######################
# 1. Prepare dataset
#######################

###
# 0. Load packages and CCES data

# load packages
library(haven)
library(missForest)

# import CES and Yale CFscore matches
cces12 <- read_dta("cces12.dta")
yale_match <- read_dta('yale_match.dta')

###

# 1. Create new CES variables

# a. Assign party label
cces12$party_id <- ifelse(cces12$pid7 < 4, "Democrat",
                          ifelse(cces12$pid7 > 4 & cces12$pid7 < 8, "Republican", "Independent"))

# b. Create issue variables
cols_pre <- NCOL(cces12) + 1  # set column number to collect varnames later

cces12$ACA <- cces12$CC332I 
cces12$climatechange <- cces12$CC321
cces12$birthcontrol <- cces12$CC332E
cces12$gaymarriage <- cces12$CC326
cces12$affaction <- cces12$CC327
cces12$guncontrol <- cces12$CC320
cces12$imm.profiling <- cces12$CC322_3
cces12$env.econ <- cces12$CC325
cces12$abortion <- cces12$CC324
cces12$dadt <- cces12$CC332J
cces12$imm.birthrightcit <- cces12$CC322_6
cces12$imm.amnesty <- cces12$CC322_1
cces12$simpsonbowles <- cces12$CC332B
cces12$repealaca <- cces12$CC332G
cces12$imm.borderpatrol <- cces12$CC322_2
cces12$imm.denyservices <- cces12$CC322_5
cces12$ryanbudget <- cces12$CC332A
cces12$taxcutact <- cces12$CC332C
cces12$taxhikeprevact <- cces12$CC332D
cces12$uskoreatrade <- cces12$CC332F
cces12$repealaca <- cces12$CC332G
cces12$keystonepipe <- cces12$CC332H
cces12$iraqmistake <- cces12$CC305
cces12$afghanmistake <- cces12$CC306
cces12$imm.finebusinesses <- cces12$CC322_4

# Take as factor answers to the two-part budget preferences question
cces12$budgetprefs <- factor(paste(as.character(cces12$CC328), as.character(cces12$CC329)))
cces12$budgetprefs[grepl("NA", cces12$budgetprefs)] <- NA


# c. Collect variable names into a vector
cols_post <- NCOL(cces12)
policies <- names(cces12)[cols_pre:cols_post]
policies_nonrcv <- c('guncontrol', 'climatechange', 'imm.profiling', 'imm.birthrightcit', 'imm.amnesty',
                            'imm.borderpatrol', 'imm.denyservices', 'imm.finebusinesses', 'abortion', 'env.econ',
                            'gaymarriage', 'affaction', 'budgetprefs') # not roll call votes

###

# 2. Merge in true CFscores for donors

# a. ID actual donor CFscores as "true"
yale_match$true_CFscore <- yale_match$cfscoreR

# b. merge into CCES file
yale_match$V101 <- yale_match$cces_caseid # collate caseid variables
cces12 <- merge(cces12, yale_match[,c('V101', 'true_CFscore')], by = 'V101', all.x = T)


###

# 3. Duplicate issue variables in character format, to create placeholder value for missing data

# create dataframe of issue variables
policy_df <- data.frame(lapply(cces12[policies], as.character), stringsAsFactors=FALSE)

# change to character format with 'No response' as NA
policy_df[is.na(policy_df)] <- "No response"

# change names before merging back in
policy_chars <- paste(policies, 'char', sep = '_')
names(policy_df) <- policy_chars

# add issue variables in character format
cces12 <- cbind(cces12, policy_df)


###

# 4. Clean-up and save
rm(list=setdiff(ls(), c("cces12",'policies','policies_nonrcv','policy_chars')))
save.image('1_output.RData')
